/*
This code attempts to estimate treatment effects of ppp
*/

do "${dodir}/make_globals.do"

ssc install drdid, all replace
ssc install csdid, all replace


////////////////////////////////////////////////////////////////////////////////
// step 1: aggregate 941 data
////////////////////////////////////////////////////////////////////////////////

use id q min_emp_2018 med_wage total_compensation num_employees g forgivenessamount_first ///
 using ${datadir}/reg_data, clear

keep if forgivenessamount_first>0 & forgivenessamount_first~=.

egen first_q=min(q), by(id)

preserve

	gcollapse (lastnm) forgivenessamount_first first_q, by(id)

	tabstat forgivenessamount_first, by(first_q) s(sum)
	tabstat forgivenessamount_first if first_q<9, s(sum)
	tabstat forgivenessamount_first if first_q>=9, s(sum)

restore
	
* dropping if we do not observe you prior to 2020. Results in  9.53e+09 of ppp being dropped
keep if q<9

rename forgivenessamount_first ppp1_amt

// size bin
g firm_size_bin=0 if min_emp_2018<10
	replace firm_size_bin=1 if min_emp_2018>=10 & min_emp_2018<50
	replace firm_size_bin=2 if min_emp_2018>=50 & min_emp_2018<100
	replace firm_size_bin=3 if min_emp_2018>=100 & min_emp_2018<250
	replace firm_size_bin=4 if min_emp_2018>=250
	
// define base period

tab g

g base=0
	replace base=1 if g==10 & q>=2 & q<6
	replace base=1 if g==11 & q>=3 & q<7
	replace base=1 if g==13 & q>=5 & q<9
	replace base=1 if g==14 & q>=6 & q<10

// BUT some firms did not file 941s in each base quarter	

// calculate current total base quarters
egen base_sum=sum(base), by(id)

bysort id: gen obs=_n

// if base less than 4, replace with first four quarters
replace base=0 if base_sum<4
replace base=1 if obs<5 & base_sum<4
drop base_sum
egen base_sum=sum(base), by(id)
tab base_sum

// if 1, assign to second through fourth observed quarters, etc

tab obs if base_sum==1 & base==1

replace base=1 if obs<5 & base_sum==1
drop base_sum
egen base_sum=sum(base), by(id)
tab base_sum

// clean the compensation variables
replace med_wage=0 if med_wage==.
replace total_compensation=0 if total_compensation==.

// have wages be the max of med_wages and compensation
replace med_wage=max(med_wage, total_compensation)

gen mean_comp=med_wage/num_employees

replace med_wage=med_wage*4 if base_sum==1 & base==1
replace med_wage=med_wage*2 if base_sum==2 & base==1
replace med_wage=med_wage*4/3 if base_sum==3 & base==1
	
egen base_comp=sum(med_wage) if base==1, by(id)
egen base_emp=mean(num_employees) if base==1, by(id)
	
gcollapse (lastnm) base_comp base_emp ppp1_amt firm_size_bin, by(id)

gcollapse (sum) base_emp  base_comp ppp1_amt, by(firm_size_bin)

save $datadir/firm_size_base, replace

////////////////////////////////////////////////////////////////////////////////
// step 2: read in wage regression results
////////////////////////////////////////////////////////////////////////////////

clear

forval xx=0/5{
	append using ${datadir}/reg_tables/reg_table_growth_bin`xx'
}

g ATT_coeff=ATT if stat=="coeff"
g ATT_bot_ci=ATT if stat=="bot_95"
g ATT_top_ci=ATT if stat=="top_95"

collapse (lastnm) ATT_coeff ATT_bot_ci ATT_top_ci, by(firm_size_bin)

// merge on firm size base
merge 1:1 firm_size_bin using $datadir/firm_size_base, update nogen

export excel $datadir/effects_by_firm_size.xlsx, sheet("background_wage") sheetreplace


////////////////////////////////////////////////////////////////////////////////
// step 3: read in employee regression results
////////////////////////////////////////////////////////////////////////////////

clear

forval xx=0/5{
	append using ${datadir}/reg_tables/reg_emp_table_growth_bin`xx'
}

g ATT_coeff=ATT if stat=="coeff"
g ATT_bot_ci=ATT if stat=="bot_95"
g ATT_top_ci=ATT if stat=="top_95"

collapse (lastnm) ATT_coeff ATT_bot_ci ATT_top_ci, by(firm_size_bin)

// merge on firm size base
merge 1:1 firm_size_bin using $datadir/firm_size_base, update nogen

export excel $datadir/effects_by_firm_size.xlsx, sheet("background_emp") sheetreplace
